import requests
from bs4 import BeautifulSoup
import pandas as pd
import jieba
import wordcloud
from matplotlib import pyplot as plt



url= 'https://comment.bilibili.com/519456863.xml'

request = requests.get(url)#获取页面
request.encoding='utf8'#因为是中文，我们需要进行转码，否则出来的都是unicode

soup = BeautifulSoup(request.text, 'lxml')
results = soup.find_all('d')#找出所有'd'
comments = [comment.text for comment in results]#得到完整的list
comments = [x.upper() for x in comments]#统一大小写
comments_clean  = [comment.replace(' ','') for comment in comments]#去掉空格

set(comments_clean)#看一下都有啥类似的没用的词语

useless_words = ['//TEST',
 '/TESR',
 '/TEST',
 '/TEST/',
 '/TEXT',
 '/TEXTSUPREME',
 '/TSET',
 '/Y',
 '\\TEST']

comments_clean = [element for element in comments_clean if element not in useless_words]#去掉不想要的字符
cipin = pd.DataFrame({'danmu':comments_clean})
cipin['danmu'].value_counts()#查看词频

danmustr = ''.join(element for element in comments_clean)#把所有的弹幕都合并成一个字符串
words = list(jieba.cut(danmustr))#分词
fnl_words = [word for word in words if len(word)>1]#去掉单字

wc = wordcloud.WordCloud(width=1000, font_path='simfang.ttf',height=800)#设定词云画的大小字体，一定要设定字体，否则中文显示不出来
wc.generate(' '.join(fnl_words))

plt.imshow(wc)#看图
wc.to_file(r"C:\Users\King M\Desktop\danmu_pic1.png")#保存


# ######################################################
# #加蒙板的图片
# # import cv2
# #
# # img = cv2.imread(r'C:\Users\King M\Desktop\circle.png', cv2.IMREAD_UNCHANGED) #直接读取成了数字格式
# # resized =  cv2.resize(img, (800, 800),interpolation = cv2.INTER_AREA)#我们把它重新设定一下大小
#
# #不过有的时候用cv2不是非常稳定，所以我们还有另一种方法
# from PIL import Image
# import numpy as np
#
# img = Image.open(r'C:\Users\King M\Desktop\yuan.jpg')
# resized = np.array(img)
#
# wc_1 = wordcloud.WordCloud(
#     background_color='black',
#     width=1000,
#     height=800,
#     mask=resized,
#     font_path='simfang.ttf'
# #    ,color_func = wordcloud.random_color_func()
# )
#
#
# wc_1.generate_from_text(' '.join(fnl_words))#绘制图片
# plt.imshow(wc_1)
# plt.axis('off')
# plt.figure()
# plt.show()  #显示图片
# wc_1.to_file(r'C:\Users\King M\Desktop\danmu_pic_2.png')
